## Ranges of Resistance R Script
## Uma Ilavarasan, Katherine Irajpanah, and Kevin Troy

#########################################################################
##                       LOAD DATA AND LIBRARIES                       ##
#########################################################################

# set working directory as needed 

library(haven) # for reading data
library(lfe) # for fixed effects
library(miceadds) # for glm cluster
library(dplyr)
library(ggplot2)
library(MASS)
library(mvtnorm) 
library(stargazer)
library(gridExtra) # for plot arrangement
library(gtable)
library(grid)
library(corrplot) # for correlation plot
library(mediation) # for mediation analysis
library(xtable) # for "balance" tables

dat <- read.csv("IIT_2001_data.csv")
med <- read.csv('med.csv')

## add logged variables and correct for 0s
dat[which(dat$area_scale==0), c("area_scale")] <- .01
dat[which(dat$pop_density==0), c("pop_density")] <- .01
dat[which(dat$pop_density_t1==0), c("pop_density_t1")] <- .01
dat$larea_scale <- log(dat$area_scale)
dat$lpop_density <- log(dat$pop_density)
dat$lpop_density_t1 <- log(dat$pop_density_t1)

## create single indicator variable that communicates whether a province experienced a 
## violent MCR event (0) or a nonviolent MCR event (1) in a given year
dat <- dat %>%
  mutate(v_nv = ifelse(civilwar_n1 == 1, 0,
                       ifelse(prim_meth == 1, 1, NA)))

#########################################################################
##                        DESCRIPTIVE STATISTICS                       ##
#########################################################################

## make summary violin plots
rugg_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = rugg_var, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('variance in ruggedness') +
  theme(legend.position = 'right', axis.title.x = element_blank()) +
  scale_fill_discrete(name = 'MCR event', labels = c('no (0)', 'yes (1)'))

rugged_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = rugged, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('mean ruggedness') +
  theme(legend.position = 'none', axis.title.x = element_blank())

ethgroups_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = ethgroups, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('total number of groups') +
  theme(legend.position = 'none', axis.title.x = element_blank())

excluded_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = excluded, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('number of groups excluded') +
  theme(legend.position = 'none', axis.title.x = element_blank())

capdist_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = cap_dist000, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('distance to capitol') +
  theme(legend.position = 'none', axis.title.x = element_blank())

area_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = larea_scale, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('log unit area') +
  theme(legend.position = 'none', axis.title.x = element_blank())

gdp_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = lngdp_pc, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('log gdp per capita') +
  theme(legend.position = 'none', axis.title.x = element_blank())

pop_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = lpop_density, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('log population density') +
  theme(legend.position = 'none', axis.title.x = element_blank())

durable_vp <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(prim_meth)) %>%
  ggplot(aes(y = durable0, x = factor(prim_meth))) +
  geom_violin(aes(fill = factor(prim_meth))) +
  geom_boxplot(width = 0.1) +
  ylab('regime durability') +
  theme(legend.position = 'none', axis.title.x = element_blank())

## combine violin plots into a single object (Figure 1)
legend <- gtable_filter(ggplotGrob(rugg_vp), 'guide-box')

arrange_vp <- grid.arrange(arrangeGrob(rugg_vp + theme(legend.position = 'none'),
                                       rugged_vp, ethgroups_vp, excluded_vp, capdist_vp,
                                       area_vp, gdp_vp, pop_vp, durable_vp,
                                       nrow = 3),
                           legend, widths = unit.c(unit(1, 'npc') - legend$width, legend$width), 
                           nrow = 1)

## plot number of provinces where an MCR campaign is taking place in a given year (not in outline)
cpy_p <- dat %>%
  filter(prim_meth == 1) %>%
  ggplot(aes(x = year)) +
  geom_bar() +
  ylab('Number of Provinces with MCR Campaigns') +
  scale_x_continuous(name = 'Year', breaks = seq(1989, 2008, 1))

## plot share of provinces where an MCR campaign is taking place in a given year
cpy_prop_p <- dat %>%
  group_by(year) %>%
  summarise(nv_res = sum(prim_meth == 1, na.rm = TRUE),
            no_res = sum(prim_meth == 0, na.rm = TRUE),
            n = sum(!is.na(prim_meth))) %>%
  mutate(nv_res_prop = nv_res/n) %>%
  ggplot(aes(x = year, y = nv_res_prop)) +
  geom_bar(stat = 'identity') + 
  scale_x_continuous(name = 'Year', breaks = seq(1989, 2008, 1)) +
  ylab('Proportion of Provinces with MCR Campaigns')

## plot frequency distribution of number of provinces where an MCR campaign is taking
## place in a given year
cpy_dens_p <- dat %>%
  group_by(year) %>%
  summarise(nv_res = sum(prim_meth == 1, na.rm = TRUE),
            no_res = sum(prim_meth == 0, na.rm = TRUE),
            v_res = sum(civilwar_n1 == 1, na.rm = TRUE),
            n = sum(!is.na(prim_meth))) %>%
  mutate(nv_res_prop = nv_res/n,
         v_res_prop = v_res/n) %>%
  ggplot(aes(x = nv_res)) +
  geom_density(color = 'red') +
  geom_density(aes(x = v_res), color = 'blue') + 
  xlab('Number of Provinces with Maximalist Campaigns in a Given Year')

## combine campaign-province-year plots into a single object (not in final paper)
arrange_cpy <- grid.arrange(arrangeGrob(cpy_p + theme(axis.title.x = element_blank()),
                                        cpy_prop_p +  scale_y_continuous(name = 'Proportion of Provinces with MCR Campaigns', 
                                                                         limits = c(0, 0.5), breaks = seq(0, 0.5, 0.05))+
                                          theme(axis.title.x = element_blank()),
                                        nrow = 2,
                                        bottom = textGrob('Year', gp = gpar(fontsize = 12)),
                                        top = textGrob('Year-to-year variation in MCR campaigns', 
                                                       vjust = 1, gp = gpar(fontface = 'bold', cex = 1.5))))

## combine campaign-province-year proportion plot and density plot into a single object (Figure 2)
arrange_cpy_2 <- grid.arrange(arrangeGrob(cpy_prop_p, cpy_dens_p, nrow = 2))

#########################################################################
##                     PREDICTED PROBABILITY PLOTS                     ##
#########################################################################

## use simulation to generate predicted probabilities of nonviolent
## campaigns by (a) log population density, and (b) variance of terrain
## ruggedness

# fit logit model
p3_1 <- glm(prim_meth ~ rugg_var + ethgroups + cap_dist000 +  
              area_scale + democ_dum_t1 + durable0_t1 + 
              democ_dur_t1 + lngdp_pc_t1 + pop_density_t1, 
            family = binomial,
            data = dat)

set.seed(02138)

## get medians of covariates  
median_covariates <- apply(dat %>%
                             filter(province_data==1)  %>% 
                             dplyr::select(rugg_var, ethgroups,
                                           cap_dist000, area_scale, 
                                           democ_dum_t1, durable0_t1,
                                           democ_dur_t1, lngdp_pc_t1,
                                           pop_density_t1),
                           MARGIN = 2,
                           function(x) median(x, na.rm = T))

# add intercept 
median_covariates <- c(1, median_covariates)

# simulate from multivariate normal
simulated_beta <- rmvnorm(5000, mean = coef(p3_1), sigma = vcov(p3_1))

# looping over range of population density
pop_density <- seq(0, 40, length.out = 50)
plot_pop_density <- sapply(pop_density, function(i){
  xb_i <- median_covariates
  xb_i[names(xb_i) == 'pop_density_t1'] <- i
  pr_i <- plogis(simulated_beta %*% xb_i)
  return(c(i,quantile(pr_i, c(0.025, 0.5, 0.975)), mean(pr_i)))
})

df_popden <- data.frame(t(plot_pop_density))
names(df_popden) <- c('population density', 'p025', 'median', 
                       'p975', 'mean')

# plot
p <- ggplot(df_popden, aes(x = `population density`, y = mean)) +
  geom_line() +
  geom_ribbon(aes(x = `population density`, 
                  ymin = p025, ymax = p975), alpha =0.25) +
  xlab('Population Density') + 
  ylab('Probability of Civil Resistance Campaign') +
  theme_classic() +
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=14,face="bold"))
p

## looping over range of variance in ruggedness
rug_var <- seq(0, 5, length.out = 50)
plot_rug_var <- sapply(rug_var, function(i){
  xb_i <- median_covariates
  xb_i[names(xb_i) == 'rugg_var'] <- i
  pr_i <- plogis(simulated_beta %*% xb_i)
  return(c(i,quantile(pr_i, c(0.025, 0.5, 0.975)), mean(pr_i)))
})

df_rug_var <- data.frame(t(plot_rug_var))
names(df_rug_var) <- c('var ruggedness', 'p025', 'median', 'p975', 'mean')

## plot
v <- ggplot(df_rug_var, aes(x = `var ruggedness`, y = mean)) +
  geom_line() +
  geom_ribbon(aes(x = `var ruggedness`, 
                  ymin = p025, ymax = p975), alpha =0.25) +
  xlab('Variance in Ruggedness') + 
  ylab('Probability of Civil Resistance Campaign') +
  ylim(0,.1) +
  theme_classic() +
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=14,face="bold"))
v


#########################################################################
##                          COEFFICIENT PLOT                           ##
#########################################################################

## glm fixed effects model
f3_1 <- glm.cluster(prim_meth ~ rugg_var + ethgroups + cap_dist000 +  
                      area_scale + democ_dum_t1 + durable0_t1 + 
                      democ_dur_t1 + lngdp_pc_t1 + pop_density_t1 +
                      as.factor(year) + as.factor(ccode), cluster = "super_ID",
                    family = "binomial",
                    data = dat)


pdf <- data.frame(model = c("logit","logit FE"),
                  covariate = c("population density"),
                  estimate = c(summary(p3_1)$coef[10,1], 
                                summary(f3_1)[10,1]),
                  lower = c(summary(p3_1)$coef[10,1] - 1.96*summary(p3_1)$coef[10,2],
                            summary(f3_1)[10,1] - 1.96*summary(f3_1)[10,2]),
                  upper = c(summary(p3_1)$coef[10,1] + 1.96*summary(p3_1)$coef[10,2],
                            summary(f3_1)[10,1] + 1.96*summary(f3_1)[10,2]))


vdf <- data.frame(model = c("logit","logit FE"),
                  covariate = c("variance in ruggedness"),
                  estimate = c(summary(p3_1)$coef[2,1], 
                               summary(f3_1)[2,1]),
                  lower = c(summary(p3_1)$coef[2,1] - 1.96*summary(p3_1)$coef[2,2],
                            summary(f3_1)[2,1] - 1.96*summary(f3_1)[2,2]),
                  upper = c(summary(p3_1)$coef[2,1] + 1.96*summary(p3_1)$coef[2,2],
                            summary(f3_1)[2,1] + 1.96*summary(f3_1)[2,2]))

df <- rbind(pdf, vdf)

ggplot(df, aes(x = estimate, y = covariate, colour = model)) + 
  geom_point(position=position_dodge(width=0.5), size = 2.5) +
  geom_errorbar(aes(xmin = lower, xmax = upper, width=.1),
                position=position_dodge(width=0.5), size = 1.25) +
  xlim(-.75,.75) +
  geom_vline(aes(xintercept = 0),linetype = "dashed") +
  ggtitle("Estimated effects of key covariates on civil resistance") +
  theme_classic() +
  theme(axis.text=element_text(size=12),
         axis.title=element_text(size=14,face="bold"))

#########################################################################
##                  REGRESSING DENSITY ON TERRAIN                      ##
#########################################################################

#regressions of population density on terrain and other variables
## models without fixed effects
# OLS model 1
lm.pop.model1 <- lm(pop_density_t1 ~ rugg_var + cap_dist000 + 
                      contig_dist000 + area_scale + A_Wh_mean_pr +
                      A_WR_mean_pr + A_DR_mean_pr,
                    data = dat%>%filter(dup92==1))
## OLS model 2
lm.pop.model2 <- lm(pop_density_t1 ~ rugged + cap_dist000 + 
                      contig_dist000 + area_scale + A_Wh_mean_pr +
                      A_WR_mean_pr + A_DR_mean_pr,
                    data = dat%>%filter(dup92==1))
## OLS model 3
lm.pop.model3 <- lm(pop_density_t1 ~ rugg_var + rugged + cap_dist000 + 
                      contig_dist000 + area_scale + A_Wh_mean_pr +
                      A_WR_mean_pr + A_DR_mean_pr,
                    data = dat%>%filter(dup92==1))
## models with fixed effects
# formula | fixed effects | 0 instr. vars. | cluster 
# FE model 1
felm.pop.model1 <- felm(pop_density_t1 ~ rugg_var + cap_dist000 + 
                          contig_dist000 + area_scale + A_Wh_mean_pr +
                          A_WR_mean_pr + A_DR_mean_pr
                        | ccode | 0 | super_ID,
                        data = dat%>%filter(dup92==1))
## FE model 2
felm.pop.model2 <- felm(pop_density_t1 ~ rugged + cap_dist000 + 
                          contig_dist000 + area_scale + A_Wh_mean_pr +
                          A_WR_mean_pr + A_DR_mean_pr
                        | ccode | 0 | super_ID,
                        data = dat%>%filter(dup92==1))
## FE model 3
felm.pop.model3 <- felm(pop_density_t1 ~ rugg_var + rugged + cap_dist000 + 
                          contig_dist000 + area_scale + A_Wh_mean_pr +
                          A_WR_mean_pr + A_DR_mean_pr
                        | ccode | 0 | super_ID,
                        data = dat%>%filter(dup92==1))
## table in body of paper
stargazer(lm.pop.model3,felm.pop.model3,
          title="Terrain and Population Density",
          type='latex',
          notes = "Standard errors in parentheses.",
          dep.var.labels   = c("Population Density at Province Level"),
          keep.stat = c("n"))
## table in appendix
stargazer(lm.pop.model1,lm.pop.model2,lm.pop.model3,felm.pop.model1,felm.pop.model2,felm.pop.model3,
          title="Terrain and Population Density",
          type='latex',
          notes = "Standard errors in parentheses.",
          dep.var.labels   = c("Population Density at Province Level"),
          keep.stat = c("n"))

#########################################################################
##                      CAUSAL MEDIATION ANALYSIS                      ##
#########################################################################

set.seed(02138)
#treatment defined as 90th percentile of variance in ruggedness
dens_eq_r_90 <- lm(pop_density~rugg_var_90 + region, data=med)
nonv_eq_r_90 <- lm(nv_sum~pop_density+rugg_var_90 + region, data= med)
mediation_nv_r_90 <- mediate(dens_eq_r_90, nonv_eq_r_90, sims=500, 
                             treat="rugg_var_90", mediator="pop_density")

# treatment defined as 80th percentile of variance in ruggedness
dens_eq_r_80 <- lm(pop_density~rugg_var_80 + region, data=med)
nonv_eq_r_80 <- lm(nv_sum~pop_density+rugg_var_80 + region, data= med)
mediation_nv_r_80 <- mediate(dens_eq_r_80, nonv_eq_r_80, sims=500, 
                             treat="rugg_var_80", mediator="pop_density")

# treatment defined as 70th percentile of variance in ruggedness
dens_eq_r_70 <- lm(pop_density~rugg_var_70 + region, data=med)
nonv_eq_r_70 <- lm(nv_sum~pop_density+rugg_var_70 + region, data= med)
mediation_nv_r_70 <- mediate(dens_eq_r_70, nonv_eq_r_70, sims=500, 
                             treat="rugg_var_70", mediator="pop_density")

# regression and mediation analysis results for baseline model reported in main text
# make table for mediation regressions
stargazer(dens_eq_r_70, nonv_eq_r_70, keep = c("pop_density", "rugg_var_70"), 
          order = c("rugg_var_70", "pop_density"), covariate.labels = c("Variance in ruggedness", "Population density"), 
          dep.var.labels = c("Population density", "Number of MCR campaigns"))

# source for data in table reporting main results of mediation analysis
summary(mediation_nv_r_70)

# treatment defined as 60th percentile of variance in ruggedness
dens_eq_r_60 <- lm(pop_density~rugg_var_60 + region, data=med)
nonv_eq_r_60 <- lm(nv_sum~pop_density+rugg_var_60 + region, data= med)
mediation_nv_r_60 <- mediate(dens_eq_r_60, nonv_eq_r_60, sims=500, 
                             treat="rugg_var_60", mediator="pop_density")

# treatment defined as 50th percentile of variance in ruggedness
dens_eq_r_50 <- lm(pop_density~rugg_var_50 + region, data=med)
nonv_eq_r_50 <- lm(nv_sum~pop_density+rugg_var_50 + region, data= med)
mediation_nv_r_50 <- mediate(dens_eq_r_50, nonv_eq_r_50, sims=500, 
                             treat="rugg_var_50", mediator="pop_density")

### causal mediation plot
# make data frame
treatment_threshold <- c(0.5,0.6,0.7,0.8,0.9)
acme_lower <- c(mediation_nv_r_50$d0.ci[1], mediation_nv_r_60$d0.ci[1], mediation_nv_r_70$d0.ci[1], mediation_nv_r_80$d0.ci[1], mediation_nv_r_90$d0.ci[1])
acme_point <- c(mediation_nv_r_50$d0, mediation_nv_r_60$d0, mediation_nv_r_70$d0, mediation_nv_r_80$d0, mediation_nv_r_90$d0)
acme_upper <- c(mediation_nv_r_50$d0.ci[2], mediation_nv_r_60$d0.ci[2], mediation_nv_r_70$d0.ci[2], mediation_nv_r_80$d0.ci[2], mediation_nv_r_90$d0.ci[2])
ade_lower <- c(mediation_nv_r_50$z0.ci[1], mediation_nv_r_60$z0.ci[1], mediation_nv_r_70$z0.ci[1], mediation_nv_r_80$z0.ci[1], mediation_nv_r_90$z0.ci[1])
ade_point <- c(mediation_nv_r_50$z0, mediation_nv_r_60$z0, mediation_nv_r_70$z0, mediation_nv_r_80$z0, mediation_nv_r_90$z0)
ade_upper <- c(mediation_nv_r_50$z0.ci[2], mediation_nv_r_60$z0.ci[2], mediation_nv_r_70$z0.ci[2], mediation_nv_r_80$z0.ci[2], mediation_nv_r_90$z0.ci[2])
med_plot_data <- tibble(treatment_threshold = treatment_threshold, acme_lower = acme_lower, acme_point = acme_point, acme_upper = acme_upper, ade_lower = ade_lower, ade_point = ade_point, ade_upper = ade_upper)

#make plot for different treatment thresholds
ggplot(data = med_plot_data, mapping = aes(x=treatment_threshold,y=acme_upper)) + 
  geom_pointrange(mapping=aes(x=treatment_threshold, y=acme_point, ymin=acme_lower, ymax=acme_upper), color="blue", alpha=0.4) + 
  geom_pointrange(mapping=aes(x=treatment_threshold, y=ade_point, ymin=ade_lower, ymax=ade_upper), color="red", alpha=0.4) + 
  geom_hline(yintercept = 0, linetype = "dashed") + 
  labs(title = "Estimated Direct and Mediated Causal Effects of Terrain Ruggedness", x = "Treatment Threshold", y = "Effect on Nonviolent Movement Incidence") + 
  theme_classic()

#########################################################################
##                             APPENDIX                                ##
#########################################################################

## table comparing means of variables for provinces with and without names
## in the year 1992

mean_tab <- dat %>%
  filter(dup92 == 1) %>%
  group_by(prim_meth) %>%
  summarize(across(one_of('rugg_var', 'rugged', 'ethgroups', 'excluded', 'cap_dist000',
                          'larea_scale', 'durable0_t1', 'lngdp_pc_t1', 'lpop_density_t1'), mean, na.rm = TRUE)) %>%
  t() %>%
  as_tibble(rownames = 'mean') %>%
  slice(-1) %>%
  rename(no_nv_mcr = V1,
         nv_mcr = V2,
         no_name = V3)

print(xtable(mean_tab, caption = 'Variable means by value of prim_meth'))

## table comparing variances (1992) 
var_tab <- dat %>%
  filter(dup92 == 1) %>%
  group_by(prim_meth) %>%
  summarize(across(one_of('rugg_var', 'rugged', 'ethgroups', 'excluded', 'cap_dist000',
                          'larea_scale', 'durable0_t1', 'lngdp_pc_t1', 'lpop_density_t1'), var, na.rm = TRUE)) %>%
  t() %>%
  as_tibble(rownames = 'variance') %>%
  slice(-1) %>%
  rename(no_nv_mcr = V1,
         nv_mcr = V2,
         no_name = V3)

print(xtable(var_tab, caption = 'Variable variances by value of prim_meth'))

## correlation plot
cor_dat <- subset(dat, select = c(rugg_var, rugged, ethgroups, cap_dist000, 
                                  area_scale, durable0_t1, lngdp_pc_t1, pop_density_t1))
cor_mat <- cor(cor_dat, use = 'pairwise.complete.obs')
corr_plot <- corrplot(cor_mat, method = 'color', tl.col = 'black', tl.cex = 0.8, tl.srt = 70)

## make violin plots comparing provinces with nonviolent and violent resistance events
rugg_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = rugg_var, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('variance in ruggedness') +
  theme(legend.position = 'right', axis.title.x = element_blank()) +
  scale_fill_discrete(name = 'campaign', labels = c('viol. (0)', 'non-viol. (1)'))

rugged_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = rugged, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('mean ruggedness') +
  theme(legend.position = 'none', axis.title.x = element_blank())

ethgroups_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = ethgroups, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('total number of groups') +
  theme(legend.position = 'none', axis.title.x = element_blank())

excluded_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = excluded, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('number of groups excluded') +
  theme(legend.position = 'none', axis.title.x = element_blank())

capdist_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = cap_dist000, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('distance to capitol') +
  theme(legend.position = 'none', axis.title.x = element_blank())

area_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = larea_scale, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('log unit area') +
  theme(legend.position = 'none', axis.title.x = element_blank())

gdp_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = lngdp_pc, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('log gdp per capita') +
  theme(legend.position = 'none', axis.title.x = element_blank())

pop_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = lpop_density, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('log population density') +
  theme(legend.position = 'none', axis.title.x = element_blank())

durable_vp_vnv <- dat %>%
  filter(dup92 == 1) %>%
  filter(!is.na(v_nv)) %>%
  ggplot(aes(y = durable0, x = factor(v_nv))) +
  geom_violin(aes(fill = factor(v_nv))) +
  geom_boxplot(width = 0.1) +
  ylab('regime durability') +
  theme(legend.position = 'none', axis.title.x = element_blank())

## combine violent-nonviolent violin plots into a single object
legend_vnv <- gtable_filter(ggplotGrob(rugg_vp_vnv), 'guide-box')

arrange_vp_vnv <- grid.arrange(arrangeGrob(rugg_vp_vnv + theme(legend.position = 'none'),
                                           rugged_vp_vnv, ethgroups_vp_vnv, excluded_vp_vnv, capdist_vp_vnv,
                                           area_vp_vnv, gdp_vp_vnv, pop_vp_vnv, durable_vp_vnv,
                                           nrow = 3),
                               legend_vnv, widths = unit.c(unit(1, 'npc') - legend$width, legend$width), 
                               nrow = 1)

## tables for GLM and GLM with fixed effects
stargazer(p3_1,digits = 3, keep.stat = c("n"))
summary(f3_1)

## media bias robustness check for logit
media_3_1 <- glm(prim_meth ~ rugg_var + ethgroups + cap_dist000 +  
              area_scale + democ_dum_t1 + durable0_t1 + 
              democ_dur_t1 + lngdp_pc_t1 + pop_density_t1, 
            family = binomial,
            data = dat %>% filter(pop_density_t1 > 0.136))

stargazer(media_3_1, digits = 3, keep.stat = c("n"))

## media bias robustness check for logit with fixed effects 
media_f3_1 <- glm.cluster(prim_meth ~ rugg_var + ethgroups + cap_dist000 +  
                      area_scale + democ_dum_t1 + durable0_t1 + 
                      democ_dur_t1 + lngdp_pc_t1 + pop_density_t1 +
                      as.factor(year) + as.factor(ccode), cluster = "super_ID",
                    family = "binomial",
                    data = dat %>% filter(pop_density_t1 > 0.136))


## robustness check for OLS regressions
## table comparing linear and logit model fits
rlinear_3_1 <- lm(prim_meth ~ rugg_var + rugged + ethgroups + 
                   cap_dist000 +  area_scale + democ_dum_t1 + 
                   durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1,
                 data = dat)

linear3_2 <- lm(formula = prim_meth ~ rugg_var + rugged + excluded_t1 + 
              cap_dist000 +  area_scale + democ_dum_t1 + 
              durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1,
            data = dat)

linear3_3 <- lm(formula = prim_meth ~ rugg_var + rugged + percexcl_t1 + 
              cap_dist000 +  area_scale + democ_dum_t1 + 
              durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1,
            data = dat)

linear3_4 <- lm(formula = prim_meth ~ rugg_var + rugged + ethgroups + 
              excluded_t1 + percexcl_t1  +  cap_dist000 +  
              area_scale + democ_dum_t1 + durable0_t1 + democ_dur_t1 + 
              lngdp_pc_t1 + pop_density_t1, data = dat)

stargazer(rlinear_3_1,linear3_2,linear3_3,linear3_4,
          digits = 3, keep.stat = c("n"))

## robustness check for logistic regressions
rp3_1 <- glm(prim_meth ~ rugg_var + rugged + ethgroups + 
              cap_dist000 +  area_scale + democ_dum_t1 + 
              durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1, 
            family = binomial,
            data = dat)

p3_2 <- glm(formula = prim_meth ~ rugg_var + rugged + excluded_t1 + 
              cap_dist000 +  area_scale + democ_dum_t1 + 
              durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1,
            family = binomial,
            data = dat)

p3_3 <- glm(formula = prim_meth ~ rugg_var + rugged + percexcl_t1 + 
              cap_dist000 +  area_scale + democ_dum_t1 + 
              durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1,
            family = binomial,
            data = dat)

p3_4 <- glm(formula = prim_meth ~ rugg_var + rugged + ethgroups + 
              excluded_t1 + percexcl_t1  +  cap_dist000 +  
              area_scale + democ_dum_t1 + durable0_t1 + democ_dur_t1 + 
              lngdp_pc_t1 + pop_density_t1, family = binomial, 
            data = dat)

stargazer(rp3_1,p3_2,p3_3,p3_4,digits = 3, keep.stat = c("n"))

## robustness check for OLS fixed effects model 
m3_1 <- felm(formula = prim_meth ~ rugg_var + rugged + ethgroups + 
               cap_dist000 +  area_scale + democ_dum_t1 + 
               durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1 
             | year + ccode | 0 | super_ID, data = dat)

m3_2 <- felm(formula = prim_meth ~ rugg_var + rugged + excluded_t1 + 
               cap_dist000 +  area_scale + democ_dum_t1 + 
               durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1  
             | year + ccode | 0 | super_ID, 
             data = dat)

m3_3 <- felm(formula = prim_meth ~ rugg_var + rugged + percexcl_t1 + 
               cap_dist000 +  area_scale + democ_dum_t1 + 
               durable0_t1 + democ_dur_t1 + lngdp_pc_t1 + pop_density_t1
             | year + ccode | 0 | super_ID, 
             data = dat)

m3_4 <- felm(formula = prim_meth ~ rugg_var + rugged + ethgroups + 
               excluded_t1 + percexcl_t1  +  cap_dist000 +  
               area_scale + democ_dum_t1 + durable0_t1 + democ_dur_t1 + 
               lngdp_pc_t1 + pop_density_t1 | year + ccode | 0 | super_ID, 
             data = dat)

stargazer(m3_1,m3_2,m3_3,m3_4,digits = 3, keep.stat = c("n"))

# mediation analysis robustness check with added controls 
dens_eq_r_70c <- lm(pop_density~rugg_var_70 + region + A_Wh_mean_pr + A_WR_mean_pr + A_DR_mean_pr, data=med)
nonv_eq_r_70c <- lm(nv_sum~pop_density+rugg_var_70 + region + A_Wh_mean_pr + A_WR_mean_pr + A_DR_mean_pr, data= med)
mediation_nv_r_70c <- mediate(dens_eq_r_70c, nonv_eq_r_70c, sims=500, 
                              treat="rugg_var_70", mediator="pop_density")

# make table for mediation regressions robustness check
stargazer(dens_eq_r_70c, nonv_eq_r_70c, keep = c("pop_density", "rugg_var_70", "A_Wh_mean_pr", "A_WR_mean_pr", "A_DR_mean_pr"), 
          order = c("rugg_var_70", "pop_density", "A_Wh_mean_pr", "A_WR_mean_pr", "A_DR_mean_pr"), 
          covariate.labels = c("Variance in ruggedness", "Population density", "Wheat suitability", "Wet rice suitability", "Dry rice suitability"), 
          dep.var.labels = c("Population density", "Number of MCR campaigns"), title = c("Mediator and Outcome Estimates with Added Controls"))

# source of information for mediation analysis robustness check table
summary(mediation_nv_r_70c)

# sensitivity test for baseline mediation analysis
sens.out <- medsens(mediation_nv_r_70, rho.by = 0.1, effect.type = "indirect", sims = 100)
summary(sens.out)
plot(sens.out)




